library('dplyr')
library('dada2')
library('tidyverse')
library('shiny')
library('phyloseq')
library('Biostrings')
library('tibble')
library('hrbrthemes')
library('microbiomeutilities')
library('ggplot2')
library('microViz')
library('markdown')
library('microbiome')
library('ggtext')
library('patchwork')
library('ggpubr')
library('ggrepel')
library('knitr')
library('tibble')
library('decontam')
library('DESeq2')
library('dendextend')
library('vegan')
library('ggforce')
library('corncob')
threads <- 6
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
After downloading the cpn60 refdb classifier, I added known input sequences to fasta and taxonomy files and saved as: cpn60_v11.seqs.fasta.
# Import taxonomy
qiime tools import --type 'FeatureData[Taxonomy]' --input-format HeaderlessTSVTaxonomyFormat --input-path cpn60_v11_taxonomy_table.txt --output-path cpn60_v11_taxonomy_table.qza
# Import ref seqs
qiime tools import --type 'FeatureData[Sequence]' --input-path cpn60_v11_seqs.fasta --output-path cpn60_v11_seqs.qza
# Train the classifier
qiime feature-classifier fit-classifier-naive-bayes --i-reference-reads cpn60_v11_seqs.qza --i-reference-taxonomy cpn60_v11_taxonomy_table.qza --o-classifier cpn60_classifier_v11.qza
# Trained classifier from paper ref'ed above with known input taxa added is saved as: /202310/cpn60_classifier_v11.qza
# Load in a list of all F reads (only using F reads for hsp60 samples, this is totally fine)
fnFs <- sort(list.files('./euprymna_fastq', pattern='_R1_001.fastq', full.names = TRUE))
# Check file names
fnFs
# Assign file names for the filtered fastq files
sample.names <- c("BacMixA", "BacMixB", "ES114A", "ES114B", "EsApoA", "EsApoB", "kitcon", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B", "SymES114A", "SymES114B", "SymES114VentA", "SymES114VentB", "SymWtA", "SymWtB", "SymWtC", "VfMixA", "VfMixB")
sample.names
euprymna.hsp60.metadata <- read.delim("euprymna_hsp60_metadata.csv", header=TRUE, sep=",")
# Check quality and filter
plotQualityProfile(fnFs[1:2])
# assign file names for filtered fastq files
filtFs <- file.path("euprymna_hsp60_filtered", paste0(sample.names, "_F_filt.fastq.gz"))
names(filtFs) <- sample.names
# trimLeft comes from hsp60 primer length (all = 26 bp)
out <- filterAndTrim(fwd=fnFs, filt=filtFs,
trimLeft=26,
#truncLen = 274,
maxN=0,
maxEE=2,
compress=TRUE, verbose=TRUE, multithread=TRUE, rm.phix=TRUE)
head(out)
# make sure a reasonable number of reads were filtered (keep >50k reads/sample-ish)
# Learn the error rates: look at how trimming affects the quality reports
timestamp()
errF <- learnErrors(filtFs, multithread=threads)
saveRDS(errF, file='errF_sampled.rds')
plotFerr <- plotErrors(errF, nominalQ=T)
suppressWarnings(print(plotFerr))
timestamp()
dadaF <- dada(filtFs, err=errF, multithread=threads, pool=FALSE)
dadaF[[1]]
# 296 sequence variants were inferred from 39500 input unique sequences
# Here you would typically merge reads, but I'm only using F reads so no merging is necessary
# Dereplication is done automatically by dada2 when given file names (which I did) so no additional dereplication step is necessary
# Make an ASV table!
seqtab <- makeSequenceTable(dadaF)
class(seqtab) # matrix array
dim(seqtab) # 23 6171
# inspect the distribution of sequence lengths
table(nchar(getSequences(seqtab))) # 274 6171
# remove chimeras
seqtab.nochim.es <- removeBimeraDenovo(seqtab, method = "consensus", multithread = threads, verbose = TRUE)
# identified 110 bimeras out of 6171 input sequences
dim(seqtab.nochim.es) # 23 6061
sum(seqtab.nochim.es)/sum(seqtab) # 0.9692
# track how many reads were lost during each step of processing:
getN <- function(x) sum(getUniques(x))
summary_tab <- data.frame(row.names=sample.names,
"DADA2 Input"=out[,1],
"Filtered"=out[,2],
"Denoised"=sapply(dadaF, getN),
"Final Read Count"=rowSums(seqtab.nochim.es),
"Percent Reads Retained"=round(rowSums(seqtab.nochim.es)/out[,1]*100, 1))
# save summary table as a file
write.table(summary_tab, "euprymna_hsp60_reads_tracked.tsv", quote=FALSE, sep="\t", col.names = NA)
# write unique seqs to a .fasta file to import into qiime2
uniquesToFasta(seqtab.nochim.es, fout='euprymna-hsp60-repseqs.fasta', ids=colnames(seqtab.nochim.es))
# write ASV table (seqtab.nochim) to a .txt file to import into QIIME2
write.table(t(seqtab.nochim.es), "euprymna-hsp60-seqtab.txt", sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
# assign taxonomy in QIIME2 using the pretrained classifier created on 3/29/2023
| Table 1. Read Processing Summary. | |||||
| Number of total reads per sample retained at each major read processing step. | |||||
| DADA2.Input | Filtered | Denoised | Final.Read.Count | Percent.Reads.Retained | |
|---|---|---|---|---|---|
| BacMixA | 187727 | 144816 | 143560 | 134514 | 71.7 |
| BacMixB | 243557 | 185214 | 183127 | 169495 | 69.6 |
| ES114A | 213942 | 147339 | 146562 | 146562 | 68.5 |
| ES114B | 236500 | 188218 | 186530 | 186530 | 78.9 |
| EsApoA | 105387 | 59908 | 50130 | 47953 | 45.5 |
| EsApoB | 125013 | 74261 | 61726 | 59218 | 47.4 |
| kitcon | 109487 | 34443 | 33242 | 28694 | 26.2 |
| MB13A | 244337 | 195270 | 192878 | 192802 | 78.9 |
| MB15A | 193187 | 151865 | 151043 | 140966 | 73.0 |
| MB15B | 226335 | 181097 | 179617 | 157443 | 69.6 |
| MB211A | 238219 | 187994 | 184194 | 183945 | 77.2 |
| MB211B | 279112 | 220305 | 215882 | 215786 | 77.3 |
| MB212A | 220679 | 173520 | 170165 | 169809 | 76.9 |
| MB212B | 165352 | 127792 | 124365 | 124252 | 75.1 |
| SymES114A | 145488 | 98491 | 90008 | 85945 | 59.1 |
| SymES114B | 194096 | 116828 | 114422 | 108783 | 56.0 |
| SymES114VentA | 219768 | 132066 | 118801 | 112053 | 51.0 |
| SymES114VentB | 211779 | 159358 | 154424 | 153062 | 72.3 |
| SymWtA | 239707 | 155598 | 145987 | 140516 | 58.6 |
| SymWtB | 173729 | 119739 | 109516 | 105946 | 61.0 |
| SymWtC | 269185 | 195194 | 184417 | 180470 | 67.0 |
| VfMixA | 155317 | 123316 | 122424 | 120974 | 77.9 |
| VfMixB | 225861 | 178305 | 177472 | 175113 | 77.5 |
qiime tools import --type 'FeatureData[Sequence]' --input-path euprymna-hsp60-repseqs.fasta --output-path euprymna-hsp60-repseqs.qza
qiime feature-classifier classify-hybrid-vsearch-sklearn \
--i-query euprymna-hsp60-repseqs.qza \
--i-reference-reads cpn60_v11_seqs.qza \
--i-reference-taxonomy cpn60_v11_taxonomy_table.qza \
--i-classifier cpn60_classifier_v11.qza \
--p-no-prefilter \
--p-maxaccepts all \
--p-maxrejects all \
--p-confidence 0.6 \
--o-classification euprymna_hsp60_classified
qiime tools export \
--input-path euprymna_hsp60_classified.qza \
--output-path euprymna_hsp60_classified
# add a special header for BIOM in the feature-table:
echo -n "#OTU Table" | cat - euprymna-hsp60-seqtab.txt > euprymna-hsp60-biom-table.txt
# convert to BIOM v2.1:
biom convert -i euprymna-hsp60-biom-table.txt -o euprymna-hsp60.biom --table-type="OTU table" --to-hdf5
# open taxonomy tsv and change headers to "#OTUID taxonomy confidence", and save a copy as taxonomy.txt
# now updated table.biom file with that biom-taxonomy info:
biom add-metadata -i euprymna-hsp60.biom -o euprymna-hsp60-wtax.biom --observation-metadata-fp taxonomy.txt --sc-separated taxonomy
# then go import the biom-with-taxonomy in R
asv.seqs <- colnames(seqtab.nochim.es)
asv.headers <- vector(dim(seqtab.nochim.es)[2], mode="character")
for (i in 1:dim(seqtab.nochim.es)[2]) {
asv.headers[i] <- paste(">ASV", i, sep="")
}
# write out a FASTA of final ASV seqs:
asv.fasta <- c(rbind(asv.headers, asv.seqs))
write(asv.fasta, "euprymna-hsp60-ASVs.fasta")
# write out a count table with ASV-IDs instead of ASV-sequences as names:
asv.tab <- t(seqtab.nochim.es)
row.names(asv.tab) <- sub(">", "", asv.headers)
write.table(asv.tab, "euprymna-hsp60-ASVcounts.tsv", sep="\t", quote=F, col.names=NA)
# import euprymna-hsp60 biom as phyloseq object
es.hsp <- import_biom("euprymna-hsp60-wtax.biom")
taxa_names(es.hsp) <- rownames(asv.tab)
tax_table(es.hsp)[1:5]
otu_table(es.hsp)[1:5]
# rename columns in seqtab.nochim.es with sample name (from filt file name)
rownames <- rownames(seqtab.nochim.es)
#use sub() to rename row names
new.rownames <- sub("_.*$", "", rownames)
#print the new row names
print(new.rownames)
sample_names(es.hsp) <- new.rownames
rownames(seqtab.nochim.es) <- new.rownames
sample_data(es.hsp) <- es.hsp.meta
sample_variables(es.hsp)
ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species")
colnames(tax_table(es.hsp))
colnames(tax_table(es.hsp)) <- ranks
tax_table(es.hsp)[1:5]
#format to best hit, do this before you assign "unclassified" labeling!
es.hsp <- format_to_besthit(es.hsp)
view(tax_table(es.hsp))
Run R::decontam to identify contaminating sequencing based on frequency and prevalence in negative extraction control samples. These ASVs are then pruned from the dataset.
# identify contaminants based on frequency
contamdf.freq.es.hsp <- isContaminant(es.hsp, method="frequency", conc="dnaquant")
head(contamdf.freq.es.hsp)
table(contamdf.freq.es.hsp$contaminant) # identified 21 contaminants
head(which(contamdf.freq.es.hsp$contaminant))
# 5, 8, 86, 107, 176, 290
# identify contaminants based on prevalence in negative control
sample_data(es.hsp)$is.neg <- sample_data(es.hsp)$sample.type == "kitcon"
contamdf.prev.es.hsp <- isContaminant(es.hsp, method="prevalence", neg="is.neg")
table(contamdf.prev.es.hsp$contaminant) # identified 7 contaminants
head(which(contamdf.prev.es.hsp$contaminant))
# 53, 57, 141, 180, 268, 1574
x <- which(contamdf.prev.es.hsp$contaminant)
y <- which(contamdf.freq.es.hsp$contaminant)
# prune contaminant ASVs identified by decontam
contam.asvs.es.hsp <- c(x,y)
contam.asvs.es.hsp
contam.asvs.es.hsp[1:28]=paste0("ASV", contam.asvs.es.hsp[1:28])
contam.asvs.es.hsp
pop.taxa = function(es.hsp, contam.asvs.es.hsp){
allTaxa = taxa_names(es.hsp)
allTaxa <- allTaxa[!(allTaxa %in% contam.asvs.es.hsp)]
return(prune_taxa(allTaxa, es.hsp))
}
es.hsp = pop.taxa(es.hsp, contam.asvs.es.hsp)
es.hsp # 6033 taxa and 23 samples
# add reads per sample to phyloseq object metadata
sample_data(es.hsp)$reads.sample <- readcount(es.hsp)
sample_data(es.hsp)
euprymna.hsp60.metadata <- read.csv("es-hsp metadata.csv")
euprymna.hsp60.metadata <- column_to_rownames(euprymna.hsp60.metadata, var="X")
sample_data(es.hsp) <- euprymna.hsp60.metadata
sample_data(es.hsp)
es.hsp <- format_to_besthit(es.hsp)
es.hsp.tx <- as.data.frame(tax_table(es.hsp))
colnames(es.hsp.tx) = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "best_hit")
head(es.hsp.tx)
head(tax_table(es.hsp))
es.hsp.tx$Family[es.hsp.tx$Phylum == "k__Bacteria"] <- "Unidentified Bacteria"
es.hsp.tx$Family[es.hsp.tx$Domain == "Unassigned"] <- "Unassigned at Kingdom Level"
unique(es.hsp.tx$Family)
es.hsp.tx <- as.matrix(es.hsp.tx)
tax_table(es.hsp) <- es.hsp.tx
tax_table(es.hsp)[1:5,1:5]
phyloseq::sample_names(es.hsp)
es.sample.order <- c("kitcon", "ES114A", "ES114B", "VfMixA", "VfMixB", "BacMixA", "BacMixB", "EsApoA", "EsApoB", "SymES114VentA", "SymES114VentB", "SymES114A", "SymES114B", "SymWtA", "SymWtB", "SymWtC", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B")
figure2_pal <- c("Aliivibrio fischeri ES114" = "#734f5a",
"Aliivibrio fischeri MB13B1" = "#264653",
"Aliivibrio fischeri MB13B2" = "#2a9d8f",
"Rossellomorea aquimaris TF-12" = "#f4a261",
"Pseudoalteromonas luteoviolacea HI1" = "#e76f51",
"Vibrio litoralis DSM17657" = "#941c2f",
"Other Species" = "#d5bdaf")
tax_palette_plot(figure2_pal)
# Subset only Bacterial Culture samples
es.hsp.culture <- es.hsp %>%
subset_samples(sample.type == "bacterial culture")
# Extract ASV data (abundance counts)
culture.asv.data <- otu_table(es.hsp.culture)
# Extract taxonomy data
culture.tax.data <- tax_table(es.hsp.culture)
# Convert ASV data to data frame and filter
df.culture.asv <- as.data.frame(culture.asv.data)
# Sum the counts for each ASV and filter out those with zero counts across all samples
asv_sums <- rowSums(df.culture.asv)
df.culture.asv <- df.culture.asv[asv_sums > 0, ]
# Make sure to filter the taxonomy data to match the filtered ASV data
df.culture.tax <- as.data.frame(culture.tax.data)
df.culture.tax <- df.culture.tax[asv_sums > 0, ]
# Convert back to otu_table and tax_table
filtered_culture.asv.data <- otu_table(as.matrix(df.culture.asv), taxa_are_rows = TRUE)
filtered_culture.tax.data <- tax_table(as.matrix(df.culture.tax))
# Recreate the phyloseq object with the filtered data
es.hsp.culture.filtered <- phyloseq(filtered_culture.asv.data, filtered_culture.tax.data, sample_data(es.hsp.culture))
# Check the result
print(es.hsp.culture.filtered)
# Subset culture samples
sample_data(es.hsp.culture)$treatment <- factor(sample_data(es.hsp.culture)$treatment, levels = c("ES114 culture", "mixed V. fischeri culture", "mixed bacteria culture"))
panel_1 <- es.hsp.culture %>%
tax_transform("compositional", chain=TRUE)
panel_1 <- comp_barplot(panel_1,
"Species",
n_taxa = 6,
tax_order=c("Aliivibrio fischeri ES114",
"Aliivibrio fischeri MB13B1",
"Aliivibrio fischeri MB13B2",
"Rossellomorea aquimaris TF-12",
"Pseudoalteromonas luteoviolacea HI1",
"Vibrio litoralis DSM17657"),
merge_other=FALSE,
bar_width = 0.975,
other_name = "Other Species",
palette = figure2_pal
) +
facet_col(~ treatment, scales="free_y") +
labs(y = "Relative Abundance",
title = "Observed Strains",
fill = "Input Strains") +
scale_y_percent(limits = c(0, 1.05)) +
theme_biome_utils() +
theme(legend.text = element_text(face = "italic", size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 9),
axis.title.x = element_text(size = 9),
strip.text = element_text(hjust = 0.5, face = "bold"),
plot.title = element_text(size = 10, hjust = 0, face = "bold"),
legend.title = element_text(size = 10),
legend.position = "left",
strip.background = element_blank(),
panel.border = element_blank(),
legend.key.width = unit(0.3, "cm"),
legend.key.height = unit(0.3, "cm")) +
#legend.box.margin = margin(0, 2, 0, 0, "cm")) +
coord_flip()
pdf("panel_1v2.pdf", width=7, height=7)
panel_1 +
plot_layout(heights = c(2, 2, 2), ncol = 1) +
theme(plot.margin = unit(c(1, 1, 0, 1), "cm")) # Top, Right, Bottom, Left
dev.off()
library(msa)
library(ggtree)
library(phangorn)
library(ggtree)
library(ape)
vfhsp60 <- readDNAStringSet("hsp60_ncbi_vf.fasta")
names(vfhsp60) <- make.unique(names(vfhsp60))
# Perform multiple sequence alignment using ClustalW (you can choose other tools like ClustalOmega or MUSCLE)
alignment <- msa(vfhsp60, method = "ClustalW")
# Convert the alignment to a DNAStringSet object
aligned_seqs <- as(alignment, "DNAStringSet")
# Convert the DNAStringSet to a matrix suitable for phylogenetic analysis
alignment_matrix <- as.matrix(aligned_seqs)
# Convert the alignment matrix to a phyDat object
alignment_phyDat <- phyDat(alignment_matrix, type = "DNA")
# Create a distance matrix
dist_matrix <- dist.ml(alignment_phyDat)
# Perform neighbor joining to create a tree
tree_nj <- NJ(dist_matrix)
# Ensure the tip labels of the tree match the sequence names
tree_nj$tip.label <- names(vfhsp60)
# Perform maximum likelihood estimation to refine the tree
fit <- pml(tree_nj, data = alignment_phyDat)
fit <- optim.pml(fit, model = "GTR", optGamma = TRUE)
# If rooting the tree, identify the label for V.logei
#v_logei_label <- "V.logei" # Adjust this to match the exact label used in your sequences
# Root the tree using V.logei as the outgroup
#rooted_tree <- root(fit$tree, outgroup = v_logei_label, resolve.root = TRUE)
#ladderized_tree <- ladderize(rooted_tree)
ladderized_tree <- ladderize(fit$tree)
# Plot the ladderized and rooted phylogenetic tree
panel_3 <- ggtree(ladderized_tree) +
geom_tiplab(size = 5,
#offset = 0.001 # Adjust offset as needed
) +
geom_treescale() +
theme_tree() +
theme(plot.margin = unit(c(1, 2, 1, 1), "cm"),
legend.position="left")
pdf("panel_3.pdf", height = 13, width = 14)
panel_3
dev.off()
# add phytree to es.hsp
random_tree = rtree(ntaxa(es.hsp), rooted=TRUE, tip.label=taxa_names(es.hsp))
phy_tree(es.hsp) <- random_tree
# Look at current naming scheme
head(tax_table(es.hsp)[, "Species"])
# Function to remove strain names from taxa names
remove_strain_names <- function(taxa_name) {
# Use gsub to remove anything after the second part of the name
gsub("(\\w+\\s\\w+).*", "\\1", taxa_name)
}
# Duplicate es.hsp phyloseq object
es.hsp.2 <- es.hsp
# Apply the function to all taxa names in the phyloseq object
tax_table(es.hsp.2) <- apply(tax_table(es.hsp.2), 2, function(x) remove_strain_names(x))
# Example to check if it worked (assuming tax_table has "Species" as a column)
head(tax_table(es.hsp.2)[, "Species"])
# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.2)
tax_data <- tax_table(es.hsp.2)
# Identify which taxa belong to "Aliivibrio fischeri"
fischeri_indices <- which(tax_data[, "Species"] == "Aliivibrio fischeri")
# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)
# Calculate the abundance of "fischeri" in each sample
fischeri_abundance <- colSums(otu_data[fischeri_indices, ])
# Calculate as a percentage
fischeri_percentage <- (fischeri_abundance / total_abundance) * 100
# Access the sample_data of the phyloseq object
es.hsp.meta <- sample_data(es.hsp.2)
# Add the fischeri_percentage as a new column
es.hsp.meta$fischeri_percentage <- fischeri_percentage
# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.2) <- es.hsp.meta
head(sample_data(es.hsp.2))
# Extract the fischeri_percentage from the sample_data
fischeri_percent_data <- sample_data(es.hsp.2)$fischeri_percentage
# Create a new label in the es.hsp sample data that concatonates the sample name and the fischeri percentage (rather than trying to put the vibrio percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.2), sprintf(" (%.2f%%)", fischeri_percent_data))
sample_data(es.hsp.2)$FischeriLabel <- paste(sample_data(es.hsp.2)$SAMPLE, new_labels)
phyloseq::sample_names(es.hsp.2)
panel_1b.1_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentB", "SymES114VentA", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")
panel_1b.2_pal <- c("Aliivibrio fischeri" = "#104911",
"Vibrio (Genus)" = "lightblue",
"Other Taxa" = "#d5bdaf")
tax_palette_plot(panel_1b.2_pal)
# Use es.hsp.2 for species-level taxa
es.hsp_filtered <- subset_samples(es.hsp.2, sample.type %in% c("juvenile squid", "juvenile squid ventate", "adult squid"))
# Perform the compositional transformation
es.hsp_filtered <- es.hsp_filtered %>%
tax_transform("compositional", chain = TRUE)
# Ensure the tax_table is in the correct format and modify it to label all species within the "Vibrio" genus
tax_table_temp <- as.data.frame(tax_table(es.hsp_filtered))
tax_table_temp$Species <- ifelse(tax_table_temp$Genus == "Vibrio", "Vibrio (Genus)", tax_table_temp$Species)
tax_table(es.hsp_filtered) <- tax_table(as.matrix(tax_table_temp))
# Create the bar plot with n_taxa = 2 for "Aliivibrio fischeri" and the "Vibrio" genus
panel_1b.2 <- comp_barplot(es.hsp_filtered, "Species",
n_taxa = 2, # Only highlight 2 taxa
label = "FischeriLabel",
sample_order = panel_1b.1_order,
merge_other = TRUE, # Merge other taxa into "Other Species"
bar_width = 0.975,
palette = panel_1b.2_pal,
group_by = "treatment",
other_name = "Other Taxa",
tax_order = c("Aliivibrio fischeri", "Vibrio (Genus)")
)
# Wrap Plots
panel_1b.2 <- patchwork::wrap_plots(panel_1b.2, ncol = 1, guides = "collect", scale = "free_x") &
labs(y = "Relative Abundance") &
scale_y_percent(limits = c(0, 1.05)) &
theme_biome_utils() &
theme(
legend.text = element_text(face = "italic", size = 9),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 9),
axis.title.x = element_text(size = 8),
strip.text = element_text(hjust = 0.5),
legend.key.width = unit(0.3, "cm"),
legend.key.height = unit(0.3, "cm"),
plot.title = element_text(size = 10, hjust = 0, face = "bold"
),
legend.title = element_blank(),
legend.position = "bottom",
panel.border = element_blank(),
legend.box.margin = margin(0, 2, 0, 0, "cm")
) &
coord_flip()
panel_1b.2 <- panel_1b.2 + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
pdf("panel_2.pdf", height = 6, width = 5)
par(mar=c(9,5,1,2), cex=1)
panel_1b.2 + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()
panel_4_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentA", "SymES114VentB", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")
panel_4_pal <- c("Vibrionaceae" = "lightblue",
"Bradyrhizobiaceae" = "#9c8bb3",
"Methylobacteriaceae" = "#604f78",
"Rhizobiaceae" = "darkseagreen2",
"Rhodobacteraceae" = "darkseagreen3",
"o__Rhodobacterales" = "darkseagreen4",
"c__Proteobacteria" = "indianred1",
"c__Alphaproteobacteria" = "indianred3",
"Alteromonadaceae" = "lightgoldenrod1",
"c__Gammaproteobacteria" = "lightgoldenrod3",
"c__Actinomycetia" = "darkslategray4",
"c__Actinobacteria" = "darkslategray",
"Other Taxa" = "#d5bdaf",
"Unidentified Bacteria" = "blanchedalmond")
tax_palette_plot(panel_4_pal)
panel4_taxorder <- c("Vibrionaceae",
"Bradyrhizobiaceae",
"Methylobacteriaceae",
"Rhizobiaceae",
"Rhodobacteraceae",
"o__Rhodobacterales",
"c__Proteobacteria",
"c__Alphaproteobacteria",
"Alteromonadaceae",
"c__Gammaproteobacteria",
"c__Actinomycetia",
"c__Actinobacteria",
"Other Taxa",
"Unidentified Bacteria")
es.hsp.panel4 <- es.hsp.2
# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.panel4)
tax_data <- tax_table(es.hsp.panel4)
# Identify which taxa belong to "Aliivibrio fischeri"
nonfischeri_indices <- which(tax_data[, "Genus"] != "Aliivibrio")
# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)
# Calculate the abundance of "fischeri" in each sample
nonfischeri_abundance <- colSums(otu_data[nonfischeri_indices, ])
# Calculate as a percentage
nonfischeri_percentage <- (nonfischeri_abundance / total_abundance) * 100
# Access the sample_data of the phyloseq object
es.hsp.panel4.meta <- sample_data(es.hsp.panel4)
# Add the fischeri_percentage as a new column
es.hsp.panel4.meta$nonfischeri_percentage <- nonfischeri_percentage
# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.panel4) <- es.hsp.panel4.meta
head(sample_data(es.hsp.panel4))
# Extract the fischeri_percentage from the sample_data
nonfischeri_percent_data <- sample_data(es.hsp.panel4)$nonfischeri_percentage
# Create a new label in the es.hsp sample data that concatonates the sample name and the fischeri percentage (rather than trying to put the vibrio percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.panel4), sprintf(" (%.2f%%)", nonfischeri_percent_data))
sample_data(es.hsp.panel4)$NonFischeriLabel <- paste(sample_data(es.hsp.panel4)$SAMPLE, new_labels)
# Subset squid samples
es.hsp.panel4 <- subset_samples(es.hsp.panel4, sample.type %in% c("juvenile squid", "juvenile squid ventate", "adult squid"))
# Remove Aliivibrio fischeri taxa
es.hsp.panel4 <- subset_taxa(es.hsp.panel4, Genus != "Aliivibrio")
# also maybe remove Family != "Unassigned at"
# Compositional transformation
es.hsp.panel4 <- es.hsp.panel4 %>%
tax_transform("compositional", chain = TRUE)
panel4 <- comp_barplot(es.hsp.panel4, "Family",
n_taxa = 13, # Only highlight 2 taxa
label = "NonFischeriLabel",
sample_order = panel_4_order,
merge_other = TRUE, # Merge other taxa into "Other Species"
bar_width = 0.975,
palette = panel_4_pal,
group_by = "treatment",
other_name = "Other Taxa",
tax_order = panel4_taxorder
#tax_order = c("Aliivibrio fischeri", "Vibrio (Genus)")
)
# Wrap Plots
panel4 <- patchwork::wrap_plots(panel4, ncol = 1, guides = "collect", scale = "free_x") &
labs(y = "Relative Abundance") &
scale_y_percent(limits = c(0, 1.05)) &
theme_biome_utils() &
theme(
legend.text = element_text(face = "italic", size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 9),
axis.title.x = element_text(size = 8),
strip.text = element_text(hjust = 0.5),
legend.key.width = unit(0.3, "cm"),
legend.key.height = unit(0.3, "cm"),
plot.title = element_text(size = 10, hjust = 0, face = "bold"),
legend.title = element_blank(),
legend.position = "bottom",
panel.border = element_blank(),
legend.box.margin = margin(0, 2, 0, 0, "cm")
) &
guides(fill = guide_legend(ncol = 3)) &
coord_flip()
panel4 <- panel4 +
plot_layout(heights = c(2,3,2,2,7))
pdf("panel_4.pdf", height = 7, width = 5)
par(mar=c(9,5,1,2), cex=1)
panel4 + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()